In [1]:
!pip install --user pytorch-tabnet
Requirement already satisfied: pytorch-tabnet in c:\users\asus\appdata\roaming\python\python311\site-packages (4.1.0)
Requirement already satisfied: numpy>=1.17 in c:\users\asus\anaconda3\lib\site-packages (from pytorch-tabnet) (1.26.4)
Requirement already satisfied: scikit_learn>0.21 in c:\users\asus\anaconda3\lib\site-packages (from pytorch-tabnet) (1.2.2)
Requirement already satisfied: scipy>1.4 in c:\users\asus\anaconda3\lib\site-packages (from pytorch-tabnet) (1.11.4)
Requirement already satisfied: torch>=1.3 in c:\users\asus\appdata\roaming\python\python311\site-packages (from pytorch-tabnet) (2.3.1)
Requirement already satisfied: tqdm>=4.36 in c:\users\asus\anaconda3\lib\site-packages (from pytorch-tabnet) (4.65.0)
Requirement already satisfied: joblib>=1.1.1 in c:\users\asus\anaconda3\lib\site-packages (from scikit_learn>0.21->pytorch-tabnet) (1.2.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\asus\anaconda3\lib\site-packages (from scikit_learn>0.21->pytorch-tabnet) (2.2.0)
Requirement already satisfied: filelock in c:\users\asus\anaconda3\lib\site-packages (from torch>=1.3->pytorch-tabnet) (3.13.1)
Requirement already satisfied: typing-extensions>=4.8.0 in c:\users\asus\anaconda3\lib\site-packages (from torch>=1.3->pytorch-tabnet) (4.9.0)
Requirement already satisfied: sympy in c:\users\asus\anaconda3\lib\site-packages (from torch>=1.3->pytorch-tabnet) (1.12)
Requirement already satisfied: networkx in c:\users\asus\anaconda3\lib\site-packages (from torch>=1.3->pytorch-tabnet) (3.1)
Requirement already satisfied: jinja2 in c:\users\asus\anaconda3\lib\site-packages (from torch>=1.3->pytorch-tabnet) (3.1.3)
Requirement already satisfied: fsspec in c:\users\asus\anaconda3\lib\site-packages (from torch>=1.3->pytorch-tabnet) (2023.10.0)
Requirement already satisfied: mkl<=2021.4.0,>=2021.1.1 in c:\users\asus\appdata\roaming\python\python311\site-packages (from torch>=1.3->pytorch-tabnet) (2021.4.0)
Requirement already satisfied: colorama in c:\users\asus\anaconda3\lib\site-packages (from tqdm>=4.36->pytorch-tabnet) (0.4.6)
Requirement already satisfied: intel-openmp==2021.* in c:\users\asus\appdata\roaming\python\python311\site-packages (from mkl<=2021.4.0,>=2021.1.1->torch>=1.3->pytorch-tabnet) (2021.4.0)
Requirement already satisfied: tbb==2021.* in c:\users\asus\anaconda3\lib\site-packages (from mkl<=2021.4.0,>=2021.1.1->torch>=1.3->pytorch-tabnet) (2021.13.0)
Requirement already satisfied: MarkupSafe>=2.0 in c:\users\asus\anaconda3\lib\site-packages (from jinja2->torch>=1.3->pytorch-tabnet) (2.1.3)
Requirement already satisfied: mpmath>=0.19 in c:\users\asus\anaconda3\lib\site-packages (from sympy->torch>=1.3->pytorch-tabnet) (1.3.0)
In [2]:
pip show pytorch-tabnet
Name: pytorch-tabnetNote: you may need to restart the kernel to use updated packages.

Version: 4.1.0
Summary: PyTorch implementation of TabNet
Home-page: https://github.com/dreamquark-ai/tabnet
Author: 
Author-email: 
License: 
Location: C:\Users\ASUS\AppData\Roaming\Python\Python311\site-packages
Requires: numpy, scikit_learn, scipy, torch, tqdm
Required-by: 
In [3]:
pip install xgboost
Requirement already satisfied: xgboost in c:\users\asus\anaconda3\lib\site-packages (2.0.3)
Requirement already satisfied: numpy in c:\users\asus\anaconda3\lib\site-packages (from xgboost) (1.26.4)
Requirement already satisfied: scipy in c:\users\asus\anaconda3\lib\site-packages (from xgboost) (1.11.4)
Note: you may need to restart the kernel to use updated packages.
In [4]:
pip install shap
Requirement already satisfied: shap in c:\users\asus\anaconda3\lib\site-packages (0.46.0)
Requirement already satisfied: numpy in c:\users\asus\anaconda3\lib\site-packages (from shap) (1.26.4)
Requirement already satisfied: scipy in c:\users\asus\anaconda3\lib\site-packages (from shap) (1.11.4)
Requirement already satisfied: scikit-learn in c:\users\asus\anaconda3\lib\site-packages (from shap) (1.2.2)
Requirement already satisfied: pandas in c:\users\asus\anaconda3\lib\site-packages (from shap) (2.2.2)
Requirement already satisfied: tqdm>=4.27.0 in c:\users\asus\anaconda3\lib\site-packages (from shap) (4.65.0)
Requirement already satisfied: packaging>20.9 in c:\users\asus\anaconda3\lib\site-packages (from shap) (23.1)
Requirement already satisfied: slicer==0.0.8 in c:\users\asus\anaconda3\lib\site-packages (from shap) (0.0.8)
Requirement already satisfied: numba in c:\users\asus\anaconda3\lib\site-packages (from shap) (0.59.0)
Requirement already satisfied: cloudpickle in c:\users\asus\anaconda3\lib\site-packages (from shap) (2.2.1)
Requirement already satisfied: colorama in c:\users\asus\anaconda3\lib\site-packages (from tqdm>=4.27.0->shap) (0.4.6)
Requirement already satisfied: llvmlite<0.43,>=0.42.0dev0 in c:\users\asus\anaconda3\lib\site-packages (from numba->shap) (0.42.0)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\asus\anaconda3\lib\site-packages (from pandas->shap) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\asus\anaconda3\lib\site-packages (from pandas->shap) (2023.3.post1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\asus\anaconda3\lib\site-packages (from pandas->shap) (2023.3)
Requirement already satisfied: joblib>=1.1.1 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn->shap) (1.2.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn->shap) (2.2.0)
Requirement already satisfied: six>=1.5 in c:\users\asus\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas->shap) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
In [5]:
pip install interpret
Requirement already satisfied: interpret in c:\users\asus\anaconda3\lib\site-packages (0.6.2)
Requirement already satisfied: interpret-core==0.6.2 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.6.2)
Requirement already satisfied: numpy>=1.11.1 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core==0.6.2->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (1.26.4)
Requirement already satisfied: scipy>=0.18.1 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core==0.6.2->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (1.11.4)
Requirement already satisfied: pandas>=0.19.2 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core==0.6.2->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.2.2)
Requirement already satisfied: scikit-learn>=0.18.1 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core==0.6.2->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (1.2.2)
Requirement already satisfied: joblib>=0.11 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core==0.6.2->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (1.2.0)
Requirement already satisfied: dash>=1.0.0 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.17.1)
Requirement already satisfied: dash-core-components>=1.0.0 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.0.0)
Requirement already satisfied: dash-html-components>=1.0.0 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.0.0)
Requirement already satisfied: dash-table>=4.1.0 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (5.0.0)
Requirement already satisfied: dash-cytoscape>=0.1.1 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (1.0.1)
Requirement already satisfied: gevent>=1.3.6 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (24.2.1)
Requirement already satisfied: requests>=2.19.0 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.31.0)
Requirement already satisfied: psutil>=5.6.2 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (5.9.0)
Requirement already satisfied: ipykernel>=4.10.0 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (6.28.0)
Requirement already satisfied: ipython>=5.5.0 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (8.20.0)
Requirement already satisfied: plotly>=3.8.1 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (5.9.0)
Requirement already satisfied: SALib>=1.3.3 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (1.5.0)
Requirement already satisfied: shap>=0.28.5 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.46.0)
Requirement already satisfied: dill>=0.2.5 in c:\users\asus\anaconda3\lib\site-packages (from interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.3.8)
Requirement already satisfied: Flask<3.1,>=1.0.4 in c:\users\asus\anaconda3\lib\site-packages (from dash>=1.0.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.2.5)
Requirement already satisfied: Werkzeug<3.1 in c:\users\asus\anaconda3\lib\site-packages (from dash>=1.0.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.2.3)
Requirement already satisfied: importlib-metadata in c:\users\asus\anaconda3\lib\site-packages (from dash>=1.0.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (7.0.1)
Requirement already satisfied: typing-extensions>=4.1.1 in c:\users\asus\anaconda3\lib\site-packages (from dash>=1.0.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (4.9.0)
Requirement already satisfied: retrying in c:\users\asus\anaconda3\lib\site-packages (from dash>=1.0.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (1.3.4)
Requirement already satisfied: nest-asyncio in c:\users\asus\anaconda3\lib\site-packages (from dash>=1.0.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (1.6.0)
Requirement already satisfied: setuptools in c:\users\asus\anaconda3\lib\site-packages (from dash>=1.0.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (68.2.2)
Requirement already satisfied: zope.event in c:\users\asus\anaconda3\lib\site-packages (from gevent>=1.3.6->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (5.0)
Requirement already satisfied: zope.interface in c:\users\asus\anaconda3\lib\site-packages (from gevent>=1.3.6->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (5.4.0)
Requirement already satisfied: greenlet>=3.0rc3 in c:\users\asus\anaconda3\lib\site-packages (from gevent>=1.3.6->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (3.0.1)
Requirement already satisfied: cffi>=1.12.2 in c:\users\asus\anaconda3\lib\site-packages (from gevent>=1.3.6->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (1.16.0)
Requirement already satisfied: comm>=0.1.1 in c:\users\asus\anaconda3\lib\site-packages (from ipykernel>=4.10.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.1.2)
Requirement already satisfied: debugpy>=1.6.5 in c:\users\asus\anaconda3\lib\site-packages (from ipykernel>=4.10.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (1.6.7)
Requirement already satisfied: jupyter-client>=6.1.12 in c:\users\asus\anaconda3\lib\site-packages (from ipykernel>=4.10.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (7.4.9)
Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\users\asus\anaconda3\lib\site-packages (from ipykernel>=4.10.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (5.5.0)
Requirement already satisfied: matplotlib-inline>=0.1 in c:\users\asus\anaconda3\lib\site-packages (from ipykernel>=4.10.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.1.6)
Requirement already satisfied: packaging in c:\users\asus\anaconda3\lib\site-packages (from ipykernel>=4.10.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (23.1)
Requirement already satisfied: pyzmq>=24 in c:\users\asus\anaconda3\lib\site-packages (from ipykernel>=4.10.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (24.0.1)
Requirement already satisfied: tornado>=6.1 in c:\users\asus\anaconda3\lib\site-packages (from ipykernel>=4.10.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (6.3.3)
Requirement already satisfied: traitlets>=5.4.0 in c:\users\asus\anaconda3\lib\site-packages (from ipykernel>=4.10.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (5.7.1)
Requirement already satisfied: decorator in c:\users\asus\anaconda3\lib\site-packages (from ipython>=5.5.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (5.1.1)
Requirement already satisfied: jedi>=0.16 in c:\users\asus\anaconda3\lib\site-packages (from ipython>=5.5.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.18.1)
Requirement already satisfied: prompt-toolkit<3.1.0,>=3.0.41 in c:\users\asus\anaconda3\lib\site-packages (from ipython>=5.5.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (3.0.43)
Requirement already satisfied: pygments>=2.4.0 in c:\users\asus\anaconda3\lib\site-packages (from ipython>=5.5.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.15.1)
Requirement already satisfied: stack-data in c:\users\asus\anaconda3\lib\site-packages (from ipython>=5.5.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.2.0)
Requirement already satisfied: colorama in c:\users\asus\anaconda3\lib\site-packages (from ipython>=5.5.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.4.6)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\asus\anaconda3\lib\site-packages (from pandas>=0.19.2->interpret-core==0.6.2->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\asus\anaconda3\lib\site-packages (from pandas>=0.19.2->interpret-core==0.6.2->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2023.3.post1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\asus\anaconda3\lib\site-packages (from pandas>=0.19.2->interpret-core==0.6.2->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2023.3)
Requirement already satisfied: tenacity>=6.2.0 in c:\users\asus\anaconda3\lib\site-packages (from plotly>=3.8.1->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (8.2.2)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\asus\anaconda3\lib\site-packages (from requests>=2.19.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in c:\users\asus\anaconda3\lib\site-packages (from requests>=2.19.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\asus\anaconda3\lib\site-packages (from requests>=2.19.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.0.7)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\asus\anaconda3\lib\site-packages (from requests>=2.19.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2024.2.2)
Requirement already satisfied: matplotlib>=3.5 in c:\users\asus\anaconda3\lib\site-packages (from SALib>=1.3.3->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (3.8.0)
Requirement already satisfied: multiprocess in c:\users\asus\anaconda3\lib\site-packages (from SALib>=1.3.3->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.70.16)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn>=0.18.1->interpret-core==0.6.2->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.2.0)
Requirement already satisfied: tqdm>=4.27.0 in c:\users\asus\anaconda3\lib\site-packages (from shap>=0.28.5->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (4.65.0)
Requirement already satisfied: slicer==0.0.8 in c:\users\asus\anaconda3\lib\site-packages (from shap>=0.28.5->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.0.8)
Requirement already satisfied: numba in c:\users\asus\anaconda3\lib\site-packages (from shap>=0.28.5->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.59.0)
Requirement already satisfied: cloudpickle in c:\users\asus\anaconda3\lib\site-packages (from shap>=0.28.5->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.2.1)
Requirement already satisfied: pycparser in c:\users\asus\anaconda3\lib\site-packages (from cffi>=1.12.2->gevent>=1.3.6->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.21)
Requirement already satisfied: Jinja2>=3.0 in c:\users\asus\anaconda3\lib\site-packages (from Flask<3.1,>=1.0.4->dash>=1.0.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (3.1.3)
Requirement already satisfied: itsdangerous>=2.0 in c:\users\asus\anaconda3\lib\site-packages (from Flask<3.1,>=1.0.4->dash>=1.0.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.0.1)
Requirement already satisfied: click>=8.0 in c:\users\asus\anaconda3\lib\site-packages (from Flask<3.1,>=1.0.4->dash>=1.0.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (8.1.7)
Requirement already satisfied: parso<0.9.0,>=0.8.0 in c:\users\asus\anaconda3\lib\site-packages (from jedi>=0.16->ipython>=5.5.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.8.3)
Requirement already satisfied: entrypoints in c:\users\asus\anaconda3\lib\site-packages (from jupyter-client>=6.1.12->ipykernel>=4.10.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.4)
Requirement already satisfied: platformdirs>=2.5 in c:\users\asus\anaconda3\lib\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel>=4.10.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (3.10.0)
Requirement already satisfied: pywin32>=300 in c:\users\asus\anaconda3\lib\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel>=4.10.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (305.1)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib>=3.5->SALib>=1.3.3->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib>=3.5->SALib>=1.3.3->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib>=3.5->SALib>=1.3.3->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (4.25.0)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib>=3.5->SALib>=1.3.3->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (1.4.4)
Requirement already satisfied: pillow>=6.2.0 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib>=3.5->SALib>=1.3.3->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (10.2.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib>=3.5->SALib>=1.3.3->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (3.0.9)
Requirement already satisfied: wcwidth in c:\users\asus\anaconda3\lib\site-packages (from prompt-toolkit<3.1.0,>=3.0.41->ipython>=5.5.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.2.5)
Requirement already satisfied: six>=1.5 in c:\users\asus\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas>=0.19.2->interpret-core==0.6.2->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (1.16.0)
Requirement already satisfied: MarkupSafe>=2.1.1 in c:\users\asus\anaconda3\lib\site-packages (from Werkzeug<3.1->dash>=1.0.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.1.3)
Requirement already satisfied: zipp>=0.5 in c:\users\asus\anaconda3\lib\site-packages (from importlib-metadata->dash>=1.0.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (3.17.0)
Requirement already satisfied: llvmlite<0.43,>=0.42.0dev0 in c:\users\asus\anaconda3\lib\site-packages (from numba->shap>=0.28.5->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.42.0)
Requirement already satisfied: executing in c:\users\asus\anaconda3\lib\site-packages (from stack-data->ipython>=5.5.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.8.3)
Requirement already satisfied: asttokens in c:\users\asus\anaconda3\lib\site-packages (from stack-data->ipython>=5.5.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (2.0.5)
Requirement already satisfied: pure-eval in c:\users\asus\anaconda3\lib\site-packages (from stack-data->ipython>=5.5.0->interpret-core[dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.2->interpret) (0.2.2)
Note: you may need to restart the kernel to use updated packages.
In [6]:
pip install torch
Requirement already satisfied: torch in c:\users\asus\appdata\roaming\python\python311\site-packages (2.3.1)
Requirement already satisfied: filelock in c:\users\asus\anaconda3\lib\site-packages (from torch) (3.13.1)
Requirement already satisfied: typing-extensions>=4.8.0 in c:\users\asus\anaconda3\lib\site-packages (from torch) (4.9.0)
Requirement already satisfied: sympy in c:\users\asus\anaconda3\lib\site-packages (from torch) (1.12)
Requirement already satisfied: networkx in c:\users\asus\anaconda3\lib\site-packages (from torch) (3.1)
Requirement already satisfied: jinja2 in c:\users\asus\anaconda3\lib\site-packages (from torch) (3.1.3)
Requirement already satisfied: fsspec in c:\users\asus\anaconda3\lib\site-packages (from torch) (2023.10.0)
Requirement already satisfied: mkl<=2021.4.0,>=2021.1.1 in c:\users\asus\appdata\roaming\python\python311\site-packages (from torch) (2021.4.0)
Requirement already satisfied: intel-openmp==2021.* in c:\users\asus\appdata\roaming\python\python311\site-packages (from mkl<=2021.4.0,>=2021.1.1->torch) (2021.4.0)
Requirement already satisfied: tbb==2021.* in c:\users\asus\anaconda3\lib\site-packages (from mkl<=2021.4.0,>=2021.1.1->torch) (2021.13.0)
Requirement already satisfied: MarkupSafe>=2.0 in c:\users\asus\anaconda3\lib\site-packages (from jinja2->torch) (2.1.3)
Requirement already satisfied: mpmath>=0.19 in c:\users\asus\anaconda3\lib\site-packages (from sympy->torch) (1.3.0)
Note: you may need to restart the kernel to use updated packages.
In [7]:
pip install scikit-learn
Requirement already satisfied: scikit-learn in c:\users\asus\anaconda3\lib\site-packages (1.2.2)
Requirement already satisfied: numpy>=1.17.3 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn) (1.26.4)
Requirement already satisfied: scipy>=1.3.2 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn) (1.11.4)
Requirement already satisfied: joblib>=1.1.1 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn) (1.2.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn) (2.2.0)
Note: you may need to restart the kernel to use updated packages.
In [8]:
pip install lightgbm
Requirement already satisfied: lightgbm in c:\users\asus\anaconda3\lib\site-packages (4.4.0)
Requirement already satisfied: numpy>=1.17.0 in c:\users\asus\anaconda3\lib\site-packages (from lightgbm) (1.26.4)
Requirement already satisfied: scipy in c:\users\asus\anaconda3\lib\site-packages (from lightgbm) (1.11.4)
Note: you may need to restart the kernel to use updated packages.
In [9]:
pip install mlxtend
Requirement already satisfied: mlxtend in c:\users\asus\anaconda3\lib\site-packages (0.23.1)
Requirement already satisfied: scipy>=1.2.1 in c:\users\asus\anaconda3\lib\site-packages (from mlxtend) (1.11.4)
Requirement already satisfied: numpy>=1.16.2 in c:\users\asus\anaconda3\lib\site-packages (from mlxtend) (1.26.4)
Requirement already satisfied: pandas>=0.24.2 in c:\users\asus\anaconda3\lib\site-packages (from mlxtend) (2.2.2)
Requirement already satisfied: scikit-learn>=1.0.2 in c:\users\asus\anaconda3\lib\site-packages (from mlxtend) (1.2.2)
Requirement already satisfied: matplotlib>=3.0.0 in c:\users\asus\anaconda3\lib\site-packages (from mlxtend) (3.8.0)
Requirement already satisfied: joblib>=0.13.2 in c:\users\asus\anaconda3\lib\site-packages (from mlxtend) (1.2.0)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (4.25.0)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (23.1)
Requirement already satisfied: pillow>=6.2.0 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (10.2.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib>=3.0.0->mlxtend) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\asus\anaconda3\lib\site-packages (from pandas>=0.24.2->mlxtend) (2023.3.post1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\asus\anaconda3\lib\site-packages (from pandas>=0.24.2->mlxtend) (2023.3)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn>=1.0.2->mlxtend) (2.2.0)
Requirement already satisfied: six>=1.5 in c:\users\asus\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib>=3.0.0->mlxtend) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
In [10]:
pip install boruta
Requirement already satisfied: boruta in c:\users\asus\anaconda3\lib\site-packages (0.3)
Requirement already satisfied: numpy>=1.10.4 in c:\users\asus\anaconda3\lib\site-packages (from boruta) (1.26.4)
Requirement already satisfied: scikit-learn>=0.17.1 in c:\users\asus\anaconda3\lib\site-packages (from boruta) (1.2.2)
Requirement already satisfied: scipy>=0.17.0 in c:\users\asus\anaconda3\lib\site-packages (from boruta) (1.11.4)
Requirement already satisfied: joblib>=1.1.1 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn>=0.17.1->boruta) (1.2.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn>=0.17.1->boruta) (2.2.0)
Note: you may need to restart the kernel to use updated packages.
In [11]:
pip install alibi
Requirement already satisfied: alibi in c:\users\asus\anaconda3\lib\site-packages (0.9.6)
Requirement already satisfied: numpy<2.0.0,>=1.16.2 in c:\users\asus\anaconda3\lib\site-packages (from alibi) (1.26.4)
Requirement already satisfied: pandas<3.0.0,>=1.0.0 in c:\users\asus\anaconda3\lib\site-packages (from alibi) (2.2.2)
Requirement already satisfied: scikit-learn<2.0.0,>=1.0.0 in c:\users\asus\anaconda3\lib\site-packages (from alibi) (1.2.2)
Requirement already satisfied: spacy<4.0.0,>=2.0.0 in c:\users\asus\anaconda3\lib\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi) (3.7.5)
Requirement already satisfied: blis<0.8.0 in c:\users\asus\anaconda3\lib\site-packages (from alibi) (0.7.11)
Requirement already satisfied: scikit-image<0.23,>=0.17.2 in c:\users\asus\anaconda3\lib\site-packages (from alibi) (0.22.0)
Requirement already satisfied: requests<3.0.0,>=2.21.0 in c:\users\asus\anaconda3\lib\site-packages (from alibi) (2.31.0)
Requirement already satisfied: Pillow<11.0,>=5.4.1 in c:\users\asus\anaconda3\lib\site-packages (from alibi) (10.2.0)
Requirement already satisfied: attrs<24.0.0,>=19.2.0 in c:\users\asus\anaconda3\lib\site-packages (from alibi) (23.1.0)
Requirement already satisfied: scipy<2.0.0,>=1.1.0 in c:\users\asus\anaconda3\lib\site-packages (from alibi) (1.11.4)
Requirement already satisfied: matplotlib<4.0.0,>=3.0.0 in c:\users\asus\anaconda3\lib\site-packages (from alibi) (3.8.0)
Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\users\asus\anaconda3\lib\site-packages (from alibi) (4.9.0)
Requirement already satisfied: dill<0.4.0,>=0.3.0 in c:\users\asus\anaconda3\lib\site-packages (from alibi) (0.3.8)
Requirement already satisfied: transformers<5.0.0,>=4.7.0 in c:\users\asus\anaconda3\lib\site-packages (from alibi) (4.44.0)
Requirement already satisfied: tqdm<5.0.0,>=4.28.1 in c:\users\asus\anaconda3\lib\site-packages (from alibi) (4.65.0)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib<4.0.0,>=3.0.0->alibi) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib<4.0.0,>=3.0.0->alibi) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib<4.0.0,>=3.0.0->alibi) (4.25.0)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib<4.0.0,>=3.0.0->alibi) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib<4.0.0,>=3.0.0->alibi) (23.1)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib<4.0.0,>=3.0.0->alibi) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib<4.0.0,>=3.0.0->alibi) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\asus\anaconda3\lib\site-packages (from pandas<3.0.0,>=1.0.0->alibi) (2023.3.post1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\asus\anaconda3\lib\site-packages (from pandas<3.0.0,>=1.0.0->alibi) (2023.3)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\asus\anaconda3\lib\site-packages (from requests<3.0.0,>=2.21.0->alibi) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in c:\users\asus\anaconda3\lib\site-packages (from requests<3.0.0,>=2.21.0->alibi) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\asus\anaconda3\lib\site-packages (from requests<3.0.0,>=2.21.0->alibi) (2.0.7)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\asus\anaconda3\lib\site-packages (from requests<3.0.0,>=2.21.0->alibi) (2024.2.2)
Requirement already satisfied: networkx>=2.8 in c:\users\asus\anaconda3\lib\site-packages (from scikit-image<0.23,>=0.17.2->alibi) (3.1)
Requirement already satisfied: imageio>=2.27 in c:\users\asus\anaconda3\lib\site-packages (from scikit-image<0.23,>=0.17.2->alibi) (2.33.1)
Requirement already satisfied: tifffile>=2022.8.12 in c:\users\asus\anaconda3\lib\site-packages (from scikit-image<0.23,>=0.17.2->alibi) (2023.4.12)
Requirement already satisfied: lazy_loader>=0.3 in c:\users\asus\anaconda3\lib\site-packages (from scikit-image<0.23,>=0.17.2->alibi) (0.3)
Requirement already satisfied: joblib>=1.1.1 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn<2.0.0,>=1.0.0->alibi) (1.2.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn<2.0.0,>=1.0.0->alibi) (2.2.0)
Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in c:\users\asus\anaconda3\lib\site-packages (from spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (3.0.12)
Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in c:\users\asus\anaconda3\lib\site-packages (from spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (1.0.5)
Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in c:\users\asus\anaconda3\lib\site-packages (from spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (1.0.10)
Requirement already satisfied: cymem<2.1.0,>=2.0.2 in c:\users\asus\anaconda3\lib\site-packages (from spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (2.0.8)
Requirement already satisfied: preshed<3.1.0,>=3.0.2 in c:\users\asus\anaconda3\lib\site-packages (from spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (3.0.9)
Requirement already satisfied: thinc<8.3.0,>=8.2.2 in c:\users\asus\anaconda3\lib\site-packages (from spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (8.2.5)
Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in c:\users\asus\anaconda3\lib\site-packages (from spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (1.1.3)
Requirement already satisfied: srsly<3.0.0,>=2.4.3 in c:\users\asus\anaconda3\lib\site-packages (from spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (2.4.8)
Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in c:\users\asus\anaconda3\lib\site-packages (from spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (2.0.10)
Requirement already satisfied: weasel<0.5.0,>=0.1.0 in c:\users\asus\anaconda3\lib\site-packages (from spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (0.4.1)
Requirement already satisfied: typer<1.0.0,>=0.3.0 in c:\users\asus\anaconda3\lib\site-packages (from spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (0.12.3)
Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in c:\users\asus\anaconda3\lib\site-packages (from spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (1.10.12)
Requirement already satisfied: jinja2 in c:\users\asus\anaconda3\lib\site-packages (from spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (3.1.3)
Requirement already satisfied: setuptools in c:\users\asus\anaconda3\lib\site-packages (from spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (68.2.2)
Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in c:\users\asus\anaconda3\lib\site-packages (from spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (3.4.0)
Requirement already satisfied: spacy-lookups-data<1.1.0,>=1.0.3 in c:\users\asus\anaconda3\lib\site-packages (from spacy[lookups]<4.0.0,>=2.0.0->alibi) (1.0.5)
Requirement already satisfied: colorama in c:\users\asus\anaconda3\lib\site-packages (from tqdm<5.0.0,>=4.28.1->alibi) (0.4.6)
Requirement already satisfied: filelock in c:\users\asus\anaconda3\lib\site-packages (from transformers<5.0.0,>=4.7.0->alibi) (3.13.1)
Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in c:\users\asus\anaconda3\lib\site-packages (from transformers<5.0.0,>=4.7.0->alibi) (0.24.5)
Requirement already satisfied: pyyaml>=5.1 in c:\users\asus\anaconda3\lib\site-packages (from transformers<5.0.0,>=4.7.0->alibi) (6.0.1)
Requirement already satisfied: regex!=2019.12.17 in c:\users\asus\anaconda3\lib\site-packages (from transformers<5.0.0,>=4.7.0->alibi) (2023.10.3)
Requirement already satisfied: safetensors>=0.4.1 in c:\users\asus\anaconda3\lib\site-packages (from transformers<5.0.0,>=4.7.0->alibi) (0.4.4)
Requirement already satisfied: tokenizers<0.20,>=0.19 in c:\users\asus\anaconda3\lib\site-packages (from transformers<5.0.0,>=4.7.0->alibi) (0.19.1)
Requirement already satisfied: fsspec>=2023.5.0 in c:\users\asus\anaconda3\lib\site-packages (from huggingface-hub<1.0,>=0.23.2->transformers<5.0.0,>=4.7.0->alibi) (2023.10.0)
Requirement already satisfied: language-data>=1.2 in c:\users\asus\anaconda3\lib\site-packages (from langcodes<4.0.0,>=3.2.0->spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (1.2.0)
Requirement already satisfied: six>=1.5 in c:\users\asus\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib<4.0.0,>=3.0.0->alibi) (1.16.0)
Requirement already satisfied: confection<1.0.0,>=0.0.1 in c:\users\asus\anaconda3\lib\site-packages (from thinc<8.3.0,>=8.2.2->spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (0.1.5)
Requirement already satisfied: click>=8.0.0 in c:\users\asus\anaconda3\lib\site-packages (from typer<1.0.0,>=0.3.0->spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (8.1.7)
Requirement already satisfied: shellingham>=1.3.0 in c:\users\asus\anaconda3\lib\site-packages (from typer<1.0.0,>=0.3.0->spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (1.5.4)
Requirement already satisfied: rich>=10.11.0 in c:\users\asus\anaconda3\lib\site-packages (from typer<1.0.0,>=0.3.0->spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (13.3.5)
Requirement already satisfied: cloudpathlib<1.0.0,>=0.7.0 in c:\users\asus\anaconda3\lib\site-packages (from weasel<0.5.0,>=0.1.0->spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (0.18.1)
Requirement already satisfied: smart-open<8.0.0,>=5.2.1 in c:\users\asus\anaconda3\lib\site-packages (from weasel<0.5.0,>=0.1.0->spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (5.2.1)
Requirement already satisfied: MarkupSafe>=2.0 in c:\users\asus\anaconda3\lib\site-packages (from jinja2->spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (2.1.3)
Requirement already satisfied: marisa-trie>=0.7.7 in c:\users\asus\anaconda3\lib\site-packages (from language-data>=1.2->langcodes<4.0.0,>=3.2.0->spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (1.2.0)
Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in c:\users\asus\anaconda3\lib\site-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (2.2.0)
Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\users\asus\anaconda3\lib\site-packages (from rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (2.15.1)
Requirement already satisfied: mdurl~=0.1 in c:\users\asus\anaconda3\lib\site-packages (from markdown-it-py<3.0.0,>=2.2.0->rich>=10.11.0->typer<1.0.0,>=0.3.0->spacy<4.0.0,>=2.0.0->spacy[lookups]<4.0.0,>=2.0.0->alibi) (0.1.0)
Note: you may need to restart the kernel to use updated packages.
In [12]:
pip install lime
Requirement already satisfied: lime in c:\users\asus\anaconda3\lib\site-packages (0.2.0.1)
Requirement already satisfied: matplotlib in c:\users\asus\anaconda3\lib\site-packages (from lime) (3.8.0)
Requirement already satisfied: numpy in c:\users\asus\anaconda3\lib\site-packages (from lime) (1.26.4)
Requirement already satisfied: scipy in c:\users\asus\anaconda3\lib\site-packages (from lime) (1.11.4)
Requirement already satisfied: tqdm in c:\users\asus\anaconda3\lib\site-packages (from lime) (4.65.0)
Requirement already satisfied: scikit-learn>=0.18 in c:\users\asus\anaconda3\lib\site-packages (from lime) (1.2.2)
Requirement already satisfied: scikit-image>=0.12 in c:\users\asus\anaconda3\lib\site-packages (from lime) (0.22.0)
Requirement already satisfied: networkx>=2.8 in c:\users\asus\anaconda3\lib\site-packages (from scikit-image>=0.12->lime) (3.1)
Requirement already satisfied: pillow>=9.0.1 in c:\users\asus\anaconda3\lib\site-packages (from scikit-image>=0.12->lime) (10.2.0)
Requirement already satisfied: imageio>=2.27 in c:\users\asus\anaconda3\lib\site-packages (from scikit-image>=0.12->lime) (2.33.1)
Requirement already satisfied: tifffile>=2022.8.12 in c:\users\asus\anaconda3\lib\site-packages (from scikit-image>=0.12->lime) (2023.4.12)
Requirement already satisfied: packaging>=21 in c:\users\asus\anaconda3\lib\site-packages (from scikit-image>=0.12->lime) (23.1)
Requirement already satisfied: lazy_loader>=0.3 in c:\users\asus\anaconda3\lib\site-packages (from scikit-image>=0.12->lime) (0.3)
Requirement already satisfied: joblib>=1.1.1 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn>=0.18->lime) (1.2.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn>=0.18->lime) (2.2.0)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib->lime) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib->lime) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib->lime) (4.25.0)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib->lime) (1.4.4)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib->lime) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib->lime) (2.8.2)
Requirement already satisfied: colorama in c:\users\asus\anaconda3\lib\site-packages (from tqdm->lime) (0.4.6)
Requirement already satisfied: six>=1.5 in c:\users\asus\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib->lime) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
In [13]:
pip install scikit-learn matplotlib
Requirement already satisfied: scikit-learn in c:\users\asus\anaconda3\lib\site-packages (1.2.2)
Requirement already satisfied: matplotlib in c:\users\asus\anaconda3\lib\site-packages (3.8.0)
Requirement already satisfied: numpy>=1.17.3 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn) (1.26.4)
Requirement already satisfied: scipy>=1.3.2 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn) (1.11.4)
Requirement already satisfied: joblib>=1.1.1 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn) (1.2.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\asus\anaconda3\lib\site-packages (from scikit-learn) (2.2.0)
Requirement already satisfied: contourpy>=1.0.1 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib) (1.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib) (0.11.0)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib) (4.25.0)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib) (1.4.4)
Requirement already satisfied: packaging>=20.0 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib) (23.1)
Requirement already satisfied: pillow>=6.2.0 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib) (10.2.0)
Requirement already satisfied: pyparsing>=2.3.1 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\asus\anaconda3\lib\site-packages (from matplotlib) (2.8.2)
Requirement already satisfied: six>=1.5 in c:\users\asus\anaconda3\lib\site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
In [14]:
pip install scikit-fuzzy
Requirement already satisfied: scikit-fuzzy in c:\users\asus\anaconda3\lib\site-packages (0.4.2)
Requirement already satisfied: numpy>=1.6.0 in c:\users\asus\anaconda3\lib\site-packages (from scikit-fuzzy) (1.26.4)
Requirement already satisfied: scipy>=0.9.0 in c:\users\asus\anaconda3\lib\site-packages (from scikit-fuzzy) (1.11.4)
Requirement already satisfied: networkx>=1.9.0 in c:\users\asus\anaconda3\lib\site-packages (from scikit-fuzzy) (3.1)
Note: you may need to restart the kernel to use updated packages.
In [15]:
pip install streamlit
Requirement already satisfied: streamlit in c:\users\asus\anaconda3\lib\site-packages (1.30.0)
Requirement already satisfied: altair<6,>=4.0 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (5.0.1)
Requirement already satisfied: blinker<2,>=1.0.0 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (1.6.2)
Requirement already satisfied: cachetools<6,>=4.0 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (4.2.2)
Requirement already satisfied: click<9,>=7.0 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (8.1.7)
Requirement already satisfied: importlib-metadata<8,>=1.4 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (7.0.1)
Requirement already satisfied: numpy<2,>=1.19.3 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (1.26.4)
Requirement already satisfied: packaging<24,>=16.8 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (23.1)
Requirement already satisfied: pandas<3,>=1.3.0 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (2.2.2)
Requirement already satisfied: pillow<11,>=7.1.0 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (10.2.0)
Requirement already satisfied: protobuf<5,>=3.20 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (3.20.3)
Requirement already satisfied: pyarrow>=6.0 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (14.0.2)
Requirement already satisfied: python-dateutil<3,>=2.7.3 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (2.8.2)
Requirement already satisfied: requests<3,>=2.27 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (2.31.0)
Requirement already satisfied: rich<14,>=10.14.0 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (13.3.5)
Requirement already satisfied: tenacity<9,>=8.1.0 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (8.2.2)
Requirement already satisfied: toml<2,>=0.10.1 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (0.10.2)
Requirement already satisfied: typing-extensions<5,>=4.3.0 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (4.9.0)
Requirement already satisfied: tzlocal<6,>=1.1 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (2.1)
Requirement already satisfied: validators<1,>=0.2 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (0.18.2)
Requirement already satisfied: gitpython!=3.1.19,<4,>=3.0.7 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (3.1.37)
Requirement already satisfied: pydeck<1,>=0.8.0b4 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (0.8.0)
Requirement already satisfied: tornado<7,>=6.0.3 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (6.3.3)
Requirement already satisfied: watchdog>=2.1.5 in c:\users\asus\anaconda3\lib\site-packages (from streamlit) (2.1.6)
Requirement already satisfied: jinja2 in c:\users\asus\anaconda3\lib\site-packages (from altair<6,>=4.0->streamlit) (3.1.3)
Requirement already satisfied: jsonschema>=3.0 in c:\users\asus\anaconda3\lib\site-packages (from altair<6,>=4.0->streamlit) (4.19.2)
Requirement already satisfied: toolz in c:\users\asus\anaconda3\lib\site-packages (from altair<6,>=4.0->streamlit) (0.12.0)
Requirement already satisfied: colorama in c:\users\asus\anaconda3\lib\site-packages (from click<9,>=7.0->streamlit) (0.4.6)
Requirement already satisfied: gitdb<5,>=4.0.1 in c:\users\asus\anaconda3\lib\site-packages (from gitpython!=3.1.19,<4,>=3.0.7->streamlit) (4.0.7)
Requirement already satisfied: zipp>=0.5 in c:\users\asus\anaconda3\lib\site-packages (from importlib-metadata<8,>=1.4->streamlit) (3.17.0)
Requirement already satisfied: pytz>=2020.1 in c:\users\asus\anaconda3\lib\site-packages (from pandas<3,>=1.3.0->streamlit) (2023.3.post1)
Requirement already satisfied: tzdata>=2022.7 in c:\users\asus\anaconda3\lib\site-packages (from pandas<3,>=1.3.0->streamlit) (2023.3)
Requirement already satisfied: six>=1.5 in c:\users\asus\anaconda3\lib\site-packages (from python-dateutil<3,>=2.7.3->streamlit) (1.16.0)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\users\asus\anaconda3\lib\site-packages (from requests<3,>=2.27->streamlit) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in c:\users\asus\anaconda3\lib\site-packages (from requests<3,>=2.27->streamlit) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\users\asus\anaconda3\lib\site-packages (from requests<3,>=2.27->streamlit) (2.0.7)
Requirement already satisfied: certifi>=2017.4.17 in c:\users\asus\anaconda3\lib\site-packages (from requests<3,>=2.27->streamlit) (2024.2.2)
Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in c:\users\asus\anaconda3\lib\site-packages (from rich<14,>=10.14.0->streamlit) (2.2.0)
Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\users\asus\anaconda3\lib\site-packages (from rich<14,>=10.14.0->streamlit) (2.15.1)
Requirement already satisfied: decorator>=3.4.0 in c:\users\asus\anaconda3\lib\site-packages (from validators<1,>=0.2->streamlit) (5.1.1)
Requirement already satisfied: smmap<5,>=3.0.1 in c:\users\asus\anaconda3\lib\site-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit) (4.0.0)
Requirement already satisfied: MarkupSafe>=2.0 in c:\users\asus\anaconda3\lib\site-packages (from jinja2->altair<6,>=4.0->streamlit) (2.1.3)
Requirement already satisfied: attrs>=22.2.0 in c:\users\asus\anaconda3\lib\site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (23.1.0)
Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\users\asus\anaconda3\lib\site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (2023.7.1)
Requirement already satisfied: referencing>=0.28.4 in c:\users\asus\anaconda3\lib\site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (0.30.2)
Requirement already satisfied: rpds-py>=0.7.1 in c:\users\asus\anaconda3\lib\site-packages (from jsonschema>=3.0->altair<6,>=4.0->streamlit) (0.10.6)
Requirement already satisfied: mdurl~=0.1 in c:\users\asus\anaconda3\lib\site-packages (from markdown-it-py<3.0.0,>=2.2.0->rich<14,>=10.14.0->streamlit) (0.1.0)
Note: you may need to restart the kernel to use updated packages.
In [16]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import IsolationForest
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectKBest, f_classif
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.ensemble import IsolationForest
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, roc_curve, roc_auc_score, classification_report, confusion_matrix, adjusted_rand_score,precision_score, recall_score, f1_score, ConfusionMatrixDisplay
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
from sklearn.cluster import KMeans,AgglomerativeClustering
from sklearn.mixture import GaussianMixture
from itertools import product
import warnings
warnings.filterwarnings('ignore')
import urllib.request
import json
import os
import xgboost as xgb
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
import shap
import xgboost as xgb
from interpret.glassbox import ExplainableBoostingClassifier
from interpret import show
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.ensemble import VotingClassifier
from pytorch_tabnet.tab_model import TabNetClassifier
import lightgbm as lgb
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE
from sklearn.linear_model import LassoCV
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.inspection import permutation_importance
from alibi.explainers import AnchorTabular
import lime
import lime.lime_tabular
from lime.lime_tabular import LimeTabularExplainer
from sklearn.model_selection import RandomizedSearchCV
from sklearn.inspection import PartialDependenceDisplay
import skfuzzy.control as ctrl
import tensorflow as tf
import dice_ml
from dice_ml import Dice
from dice_ml.utils import helpers
from jupyter_dash import JupyterDash
from dash import dcc, html
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
import plotly.express as px
import streamlit as st
import streamlit.components.v1 as components
import time
WARNING:tensorflow:From C:\Users\ASUS\anaconda3\Lib\site-packages\alibi\explainers\cem.py:35: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

Data Pre-processing¶

In [17]:
# diabetes dataset
diabetes_path = 'C://Users/ASUS/Desktop/Master Thesis_Lung Cancer/Diabetes dataset/diabetes.csv'
diabetes= pd.read_csv(diabetes_path)
diabetes
Out[17]:
Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome
0 6 148 72 35 0 33.6 0.627 50 1
1 1 85 66 29 0 26.6 0.351 31 0
2 8 183 64 0 0 23.3 0.672 32 1
3 1 89 66 23 94 28.1 0.167 21 0
4 0 137 40 35 168 43.1 2.288 33 1
... ... ... ... ... ... ... ... ... ...
763 10 101 76 48 180 32.9 0.171 63 0
764 2 122 70 27 0 36.8 0.340 27 0
765 5 121 72 23 112 26.2 0.245 30 0
766 1 126 60 0 0 30.1 0.349 47 1
767 1 93 70 31 0 30.4 0.315 23 0

768 rows × 9 columns

In [18]:
diabetes.info()

diabetes.min()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB
Out[18]:
Pregnancies                  0.000
Glucose                      0.000
BloodPressure                0.000
SkinThickness                0.000
Insulin                      0.000
BMI                          0.000
DiabetesPedigreeFunction     0.078
Age                         21.000
Outcome                      0.000
dtype: float64
In [19]:
diabetes.max()
Out[19]:
Pregnancies                  17.00
Glucose                     199.00
BloodPressure               122.00
SkinThickness                99.00
Insulin                     846.00
BMI                          67.10
DiabetesPedigreeFunction      2.42
Age                          81.00
Outcome                       1.00
dtype: float64

Exploratory Data Analysis¶

Summary Statistics¶

In [20]:
# Basic statistics
print("Basic statistics:\n")
diabetes.describe()
Basic statistics:

Out[20]:
Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome
count 768.000000 768.000000 768.000000 768.000000 768.000000 768.000000 768.000000 768.000000 768.000000
mean 3.845052 120.894531 69.105469 20.536458 79.799479 31.992578 0.471876 33.240885 0.348958
std 3.369578 31.972618 19.355807 15.952218 115.244002 7.884160 0.331329 11.760232 0.476951
min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.078000 21.000000 0.000000
25% 1.000000 99.000000 62.000000 0.000000 0.000000 27.300000 0.243750 24.000000 0.000000
50% 3.000000 117.000000 72.000000 23.000000 30.500000 32.000000 0.372500 29.000000 0.000000
75% 6.000000 140.250000 80.000000 32.000000 127.250000 36.600000 0.626250 41.000000 1.000000
max 17.000000 199.000000 122.000000 99.000000 846.000000 67.100000 2.420000 81.000000 1.000000
In [21]:
# Distribution of target variable
print("Distribution of target variable 'diabetes stages':\n", diabetes['Outcome'].value_counts())

# Visualization of target variable
plt.figure(figsize=(8, 6))
sns.countplot(x='Outcome', data=diabetes, palette='viridis')
plt.title('Distribution of Diabetes Outcome')
plt.xlabel('Diabetes Outcome (0 = No, 1 = Yes)')
plt.ylabel('Count')
plt.show()
Distribution of target variable 'diabetes stages':
 Outcome
0    500
1    268
Name: count, dtype: int64
No description has been provided for this image
In [22]:
# histogram representation
plt.figure(figsize=(10, 6))
diabetes.hist(bins=30, figsize=(20, 15), layout=(5, 5))
plt.show()
<Figure size 1000x600 with 0 Axes>
No description has been provided for this image

Correlation Analysis¶

In [23]:
# correlation matrix
diabetes.corr()
Out[23]:
Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome
Pregnancies 1.000000 0.129459 0.141282 -0.081672 -0.073535 0.017683 -0.033523 0.544341 0.221898
Glucose 0.129459 1.000000 0.152590 0.057328 0.331357 0.221071 0.137337 0.263514 0.466581
BloodPressure 0.141282 0.152590 1.000000 0.207371 0.088933 0.281805 0.041265 0.239528 0.065068
SkinThickness -0.081672 0.057328 0.207371 1.000000 0.436783 0.392573 0.183928 -0.113970 0.074752
Insulin -0.073535 0.331357 0.088933 0.436783 1.000000 0.197859 0.185071 -0.042163 0.130548
BMI 0.017683 0.221071 0.281805 0.392573 0.197859 1.000000 0.140647 0.036242 0.292695
DiabetesPedigreeFunction -0.033523 0.137337 0.041265 0.183928 0.185071 0.140647 1.000000 0.033561 0.173844
Age 0.544341 0.263514 0.239528 -0.113970 -0.042163 0.036242 0.033561 1.000000 0.238356
Outcome 0.221898 0.466581 0.065068 0.074752 0.130548 0.292695 0.173844 0.238356 1.000000
In [24]:
#heat map
plt.figure(figsize=(10, 8))
sns.heatmap(diabetes.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.show()
No description has been provided for this image

Handling Missing Values & Duplicates¶

In [25]:
# missing values
missing_values = diabetes.isnull().sum()
print("Missing values before handling:\n", missing_values)
Missing values before handling:
 Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64
In [26]:
# Duplicates
numof_duplicates = diabetes.duplicated().sum()
print(f"Number of duplicate rows: {numof_duplicates}")
Number of duplicate rows: 0

Outlier Detection¶

In [27]:
# Checking for outliers using boxplots
plt.figure(figsize=(15, 10))
sns.boxplot(data=diabetes)
plt.xticks(rotation=90)
plt.show()
No description has been provided for this image
In [28]:
#Removing outliers using interquartile range method
Q1 = diabetes.quantile(0.25)
Q3 = diabetes.quantile(0.75)
IQR = Q3 - Q1
diabetes = diabetes[~((diabetes < (Q1 - 1.5 * IQR)) | (diabetes > (Q3 + 1.5 * IQR))).any(axis=1)]
In [29]:
plt.figure(figsize=(15, 10))
sns.boxplot(data=diabetes)
plt.xticks(rotation=90)
plt.show()
No description has been provided for this image
In [30]:
diabetes
Out[30]:
Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome
0 6 148 72 35 0 33.6 0.627 50 1
1 1 85 66 29 0 26.6 0.351 31 0
2 8 183 64 0 0 23.3 0.672 32 1
3 1 89 66 23 94 28.1 0.167 21 0
5 5 116 74 0 0 25.6 0.201 30 0
... ... ... ... ... ... ... ... ... ...
763 10 101 76 48 180 32.9 0.171 63 0
764 2 122 70 27 0 36.8 0.340 27 0
765 5 121 72 23 112 26.2 0.245 30 0
766 1 126 60 0 0 30.1 0.349 47 1
767 1 93 70 31 0 30.4 0.315 23 0

639 rows × 9 columns

Feature Engineering¶

In [31]:
diabetes['glucose_bmi_interaction'] = diabetes['Glucose'] * diabetes['BMI']
diabetes
Out[31]:
Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome glucose_bmi_interaction
0 6 148 72 35 0 33.6 0.627 50 1 4972.8
1 1 85 66 29 0 26.6 0.351 31 0 2261.0
2 8 183 64 0 0 23.3 0.672 32 1 4263.9
3 1 89 66 23 94 28.1 0.167 21 0 2500.9
5 5 116 74 0 0 25.6 0.201 30 0 2969.6
... ... ... ... ... ... ... ... ... ... ...
763 10 101 76 48 180 32.9 0.171 63 0 3322.9
764 2 122 70 27 0 36.8 0.340 27 0 4489.6
765 5 121 72 23 112 26.2 0.245 30 0 3170.2
766 1 126 60 0 0 30.1 0.349 47 1 3792.6
767 1 93 70 31 0 30.4 0.315 23 0 2827.2

639 rows × 10 columns

Feature Scaling¶

In [32]:
features = diabetes.drop('Outcome', axis=1)
target = diabetes['Outcome']

# Applying StandardScaler
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

diabetes[features.columns] = scaled_features


scaled_file_path = 'C://Users/ASUS/Desktop/Master Thesis_Lung Cancer/Diabetes dataset/scaled_diabetes_dataset.csv'
diabetes.to_csv(scaled_file_path, index=False)
In [33]:
diabetes
Out[33]:
Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome glucose_bmi_interaction
0 0.673824 0.991351 -0.010626 0.941847 -0.829247 0.247974 0.788892 1.561132 1 0.834799
1 -0.860651 -1.170674 -0.539736 0.550406 -0.829247 -0.840853 -0.311759 -0.154912 0 -1.178473
2 1.287615 2.192476 -0.716106 -1.341558 -0.829247 -1.354158 0.968346 -0.064594 1 0.308503
3 -0.860651 -1.033403 -0.539736 0.158965 0.353035 -0.607533 -1.045526 -1.058093 0 -1.000369
5 0.366929 -0.106820 0.165744 -1.341558 -0.829247 -0.996400 -0.909939 -0.245230 0 -0.652400
... ... ... ... ... ... ... ... ... ... ...
763 1.901405 -0.621588 0.342113 1.789969 1.434699 0.139091 -1.029575 2.735268 0 -0.390106
764 -0.553756 0.099087 -0.186996 0.419926 -0.829247 0.745724 -0.355625 -0.516184 0 0.476066
765 0.366929 0.064769 -0.010626 0.158965 0.579430 -0.903072 -0.734473 -0.245230 0 -0.503473
766 -0.860651 0.236358 -1.068846 -1.341558 -0.829247 -0.296440 -0.319735 1.290178 1 -0.041395
767 -0.860651 -0.896131 -0.186996 0.680887 -0.829247 -0.249776 -0.455322 -0.877457 0 -0.758120

639 rows × 10 columns

Data Balancing Using SMOTE¶

In [34]:
print(diabetes['Outcome'].value_counts())
Outcome
0    439
1    200
Name: count, dtype: int64
In [35]:
# distribution of the target variable
plt.figure(figsize=(8, 6))
sns.countplot(x='Outcome', data=diabetes, palette='viridis')
plt.title('Distribution of Diabetes Outcome')
plt.xlabel('Diabetes Outcome (0 = Non-Diabetic, 1 = Diabetic)')
plt.ylabel('Count')
plt.show()
No description has been provided for this image
In [36]:
# features and target variable
X = diabetes.drop('Outcome', axis=1)
y = diabetes['Outcome']

# Applying SMOTE to the entire dataset 
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

diabetes_resampled = pd.DataFrame(X_resampled, columns=X.columns)
diabetes_resampled['Outcome'] = y_resampled
diabetes = diabetes_resampled

print(diabetes['Outcome'].value_counts())
Outcome
1    439
0    439
Name: count, dtype: int64
In [37]:
# distribution of the target variable
plt.figure(figsize=(8, 6))
sns.countplot(x='Outcome', data=diabetes, palette='viridis')
plt.title('Distribution of Diabetes Outcome')
plt.xlabel('Diabetes Outcome ((0 = Non-Diabetic, 1 = Diabetic)')
plt.ylabel('Count')
plt.show()
No description has been provided for this image

Feature Selection¶

In [38]:
# Initializing the Random Forest model
model = RandomForestClassifier(random_state=42)

n_features_to_select = 5 
rfe = RFE(estimator=model, n_features_to_select=n_features_to_select, step=1)
rfe.fit(X_resampled, y_resampled)

selected_features = X.columns[rfe.support_]
print("Selected features:", selected_features)
Selected features: Index(['Glucose', 'BMI', 'DiabetesPedigreeFunction', 'Age',
       'glucose_bmi_interaction'],
      dtype='object')
In [39]:
# Removing irrelevant columns
columns_to_drop = ['Insulin', 'SkinThickness', 'BloodPressure']
diabetes = diabetes.drop(columns=columns_to_drop)
diabetes
Out[39]:
Pregnancies Glucose BMI DiabetesPedigreeFunction Age glucose_bmi_interaction Outcome
0 0.673824 0.991351 0.247974 0.788892 1.561132 0.834799 1
1 -0.860651 -1.170674 -0.840853 -0.311759 -0.154912 -1.178473 0
2 1.287615 2.192476 -1.354158 0.968346 -0.064594 0.308503 1
3 -0.860651 -1.033403 -0.607533 -1.045526 -1.058093 -1.000369 0
4 0.366929 -0.106820 -0.996400 -0.909939 -0.245230 -0.652400 0
... ... ... ... ... ... ... ...
873 0.944034 -0.173405 0.108662 -0.662370 0.849384 -0.087750 1
874 0.190319 0.926603 0.548500 1.749779 0.075142 0.965831 1
875 1.305768 -0.288115 -0.011136 -0.516894 0.908705 -0.233628 1
876 0.081558 1.151113 0.147725 -0.252982 -0.393649 0.859111 1
877 -0.886275 0.734000 0.275308 0.000557 -0.632809 0.685970 1

878 rows × 7 columns

Data Splitting¶

In [40]:
X = diabetes.drop('Outcome', axis=1)
y = diabetes['Outcome']

# Splitting the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
In [41]:
print("Training set size:", X_train.shape)
print("Test set size:", X_test.shape)
Training set size: (702, 6)
Test set size: (176, 6)

SVM¶

In [42]:
#parameter grid for hyperparameter tuning
hyperparameter_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}


svmModel_classifier = SVC(probability=True)

# Initializing Grid Search with cross-validation
grid = GridSearchCV(svmModel_classifier, hyperparameter_grid, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)
best_params = grid.best_params_
print(f'Best parameters found: {best_params}')
Best parameters found: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
In [43]:
start_time = time.time()

#Training the model 
final_model = SVC(C=best_params['C'], kernel=best_params['kernel'], gamma=best_params['gamma'], probability=True, random_state=42)
final_model.fit(X_train, y_train)
end_time = time.time()  
training_time_svm = end_time - start_time
print(f"SVM Training Time: {training_time_svm:.4f} seconds")

start_time = time.time()
# Making predictions
y_pred = final_model.predict(X_test)
end_time = time.time()  
prediction_time_svm = end_time - start_time
print(f"SVM prediction Time: {prediction_time_svm:.4f} seconds")
SVM Training Time: 0.0886 seconds
SVM prediction Time: 0.0020 seconds
In [44]:
# accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print('Confusion Matrix-SVM')
print(conf_matrix)

disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix)
disp.plot(cmap='Blues')
plt.title('Confusion Matrix-SVM')
plt.show()

# precision, recall, and F1 score
precision = precision_score(y_test, y_pred, average='binary')
recall = recall_score(y_test, y_pred, average='binary')
f1 = f1_score(y_test, y_pred, average='binary')

print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

# positive class
y_pred_proba = final_model.predict_proba(X_test)[:, 1]

# false positive rates and true positive rates
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)

# AUC
auc = roc_auc_score(y_test, y_pred_proba)

# AUC-ROC curve
plt.figure(figsize=(10, 6))
plt.plot(fpr, tpr, label=f'ROC curve (AUC = {auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line representing random chance
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

print(f'Area Under the Curve (AUC): {auc:.4f}')
Accuracy: 0.8125
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.80      0.81        88
           1       0.80      0.83      0.82        88

    accuracy                           0.81       176
   macro avg       0.81      0.81      0.81       176
weighted avg       0.81      0.81      0.81       176

Confusion Matrix-SVM
[[70 18]
 [15 73]]
No description has been provided for this image
Precision: 0.8022
Recall: 0.8295
F1 Score: 0.8156
No description has been provided for this image
Area Under the Curve (AUC): 0.8280

Post-hoc Explanation(XAI)¶

Permutation Feature Importance¶

In [45]:
# Calculating permutation feature importance
result = permutation_importance(final_model, X_test, y_test, n_repeats=10, random_state=42, scoring='accuracy')
perm_importances = result.importances_mean
perm_importances_std = result.importances_std

features = X_test.columns

# Plotting feature importances
indices = np.argsort(perm_importances)
plt.figure(figsize=(10, 6))
plt.title('Permutation Feature Importance for SVM')
plt.barh(range(len(indices)), perm_importances[indices], xerr=perm_importances_std[indices], align='center')
plt.yticks(range(len(indices)), [features[i] for i in indices])
plt.xlabel('Importance')
plt.show()
No description has been provided for this image

LIME¶

In [46]:
# Initializing LIME explainer
explainer = lime.lime_tabular.LimeTabularExplainer(
    X_train.values, 
    mode='classification',
    feature_names=X_train.columns,
    class_names=['Non-Diabetic', 'Diabetic'], 
    discretize_continuous=True
)

instance_idx = 0  # instance 0 is selected
exp = explainer.explain_instance(X_test.values[instance_idx], final_model.predict_proba, num_features=10)

# LIME explanation
exp.show_in_notebook(show_table=True, show_all=False)
In [47]:
# LIME explanation plot
exp.as_pyplot_figure()
plt.show()
No description has been provided for this image

Random Forest¶

In [48]:
# parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# model intilization
model_rf = RandomForestClassifier(random_state=42)

# grid search
grid_search = GridSearchCV(estimator=model_rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

# best parameters
print(f'Best parameters found: {grid_search.best_params_}')
Fitting 5 folds for each of 216 candidates, totalling 1080 fits
Best parameters found: {'bootstrap': False, 'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
In [49]:
start_time = time.time()
# Predict and evaluate
y_pred = grid_search.predict(X_test)

end_time = time.time()  
prediction_time_rf = end_time - start_time
print(f"Random Forest prediction Time: {prediction_time_rf:.4f} seconds")

accuracy_rf = accuracy_score(y_test, y_pred)
print(f'Accuracy with tuned Random Forest: {accuracy_rf:.4f}')
print('Classification Report:')
print(classification_report(y_test, y_pred))

# Confusion Matrix
conf_matrix_rf = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix_rf)

disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix_rf)
disp.plot(cmap='Blues')
plt.title('Confusion Matrix-Random Forest')
plt.show()

#precision, recall, and F1 score
precision = precision_score(y_test, y_pred, average='binary')
recall = recall_score(y_test, y_pred, average='binary')
f1 = f1_score(y_test, y_pred, average='binary')

print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

#  positive class probability
y_pred_proba_rf = grid_search.predict_proba(X_test)[:, 1]

# false positive rates and true positive rates
fpr_rf, tpr_rf, _ = roc_curve(y_test, y_pred_proba_rf)

# AUC
auc_rf = roc_auc_score(y_test, y_pred_proba_rf)

# AUC-ROC curve
plt.figure(figsize=(10, 6))
plt.plot(fpr_rf, tpr_rf, label=f'ROC curve (AUC = {auc_rf:.2f})')
plt.plot([0, 1], [0, 1], 'k--')  
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()
Random Forest prediction Time: 0.0225 seconds
Accuracy with tuned Random Forest: 0.8068
Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.82      0.81        88
           1       0.81      0.80      0.80        88

    accuracy                           0.81       176
   macro avg       0.81      0.81      0.81       176
weighted avg       0.81      0.81      0.81       176

Confusion Matrix:
[[72 16]
 [18 70]]
No description has been provided for this image
Precision: 0.8140
Recall: 0.7955
F1 Score: 0.8046
No description has been provided for this image

KNN¶

In [50]:
# Initializing the KNN classifier
knn = KNeighborsClassifier(n_neighbors=5)  

# Training the model
knn.fit(X_train, y_train)

start_time = time.time()
# model predictions
y_pred = knn.predict(X_test)

end_time = time.time()  
prediction_time_knn = end_time - start_time
print(f"knn prediction Time: {prediction_time_knn:.4f} seconds")
knn prediction Time: 0.0200 seconds
In [51]:
# accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')
print("Classification Report:")
print(classification_report(y_test, y_pred))


# Confusion Matrix
conf_matrix_knn = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix_knn)


disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix_knn)
disp.plot(cmap='Blues')
plt.title('Confusion Matrix - KNN')
plt.show()

#  precision, recall, and F1 score
precision_knn = precision_score(y_test, y_pred, average='binary')
recall_knn = recall_score(y_test, y_pred, average='binary')
f1_knn = f1_score(y_test, y_pred, average='binary')

print(f'Precision: {precision_knn:.4f}')
print(f'Recall: {recall_knn:.4f}')
print(f'F1 Score: {f1_knn:.4f}')

#  positive class probability
y_pred_proba_knn = knn.predict_proba(X_test)[:, 1]

#  false positive rates and true positive rates
fpr_knn, tpr_knn, _ = roc_curve(y_test, y_pred_proba_knn)

#  AUC
auc_knn = roc_auc_score(y_test, y_pred_proba_knn)

# AUC-ROC curve
plt.figure(figsize=(10, 6))
plt.plot(fpr_knn, tpr_knn, label=f'ROC curve (AUC = {auc_knn:.2f})')
plt.plot([0, 1], [0, 1], 'k--')  
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve - KNN')
plt.legend(loc='lower right')
plt.show()

print(f'Area Under the Curve (AUC): {auc_knn:.4f}')
Accuracy: 0.8352
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.83      0.83        88
           1       0.83      0.84      0.84        88

    accuracy                           0.84       176
   macro avg       0.84      0.84      0.84       176
weighted avg       0.84      0.84      0.84       176

Confusion Matrix:
[[73 15]
 [14 74]]
No description has been provided for this image
Precision: 0.8315
Recall: 0.8409
F1 Score: 0.8362
No description has been provided for this image
Area Under the Curve (AUC): 0.8461

Post-hoc Explanation(XAI)¶

In [52]:
# Compute permutation feature importance
perm_importance = permutation_importance(knn, X_test, y_test, n_repeats=30, random_state=42, scoring='accuracy')

# Get feature names and their importance scores
feature_names = X_test.columns
importances = perm_importance.importances_mean
std = perm_importance.importances_std

# Sort the features by importance
sorted_idx = np.argsort(importances)
plt.figure(figsize=(10, 6))
plt.barh(range(len(sorted_idx)), importances[sorted_idx], xerr=std[sorted_idx])
plt.yticks(range(len(sorted_idx)), [feature_names[i] for i in sorted_idx])
plt.xlabel('Permutation Importance')
plt.title('Permutation Feature Importance for KNN')
plt.show()
No description has been provided for this image
In [53]:
# Initialize the LIME explainer
explainer = lime.lime_tabular.LimeTabularExplainer(
    X_train.values, 
    mode='classification',
    feature_names=X_train.columns,
    class_names=['Non-Diabetic', 'Diabetic'], 
    discretize_continuous=True
)

# Select an instance to explain
instance_idx = 0  # You can choose any instance
exp = explainer.explain_instance(X_test.values[instance_idx], knn.predict_proba, num_features=10)

# Display the explanation
exp.show_in_notebook(show_table=True, show_all=False)
In [54]:
# Plot LIME explanation
exp.as_pyplot_figure()
plt.show()
No description has been provided for this image

XG Boost¶

In [55]:
# initialization
model_xgb = xgb.XGBClassifier(random_state=42)

#model training
model_xgb.fit(X_train, y_train)

start_time = time.time()
y_pred = model_xgb.predict(X_test)

end_time = time.time()  
prediction_time_xgboost = end_time - start_time
print(f"XGBoost prediction Time: {prediction_time_xgboost:.4f} seconds")
XGBoost prediction Time: 0.0000 seconds
In [56]:
# accuracy
accuracy_xgb = accuracy_score(y_test, y_pred)
print(f'Accuracy with XGBoost: {accuracy_xgb:.4f}')
print('Classification Report:')
print(classification_report(y_test, y_pred))


# Confusion Matrix
conf_matrix_xgb = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix_xgb)


disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix_xgb)
disp.plot(cmap='Blues')
plt.title('Confusion Matrix - XGBoost')
plt.show()

# precision, recall, and F1 score
precision_xgb = precision_score(y_test, y_pred, average='binary')
recall_xgb = recall_score(y_test, y_pred, average='binary')
f1_xgb = f1_score(y_test, y_pred, average='binary')

print(f'Precision: {precision_xgb:.4f}')
print(f'Recall: {recall_xgb:.4f}')
print(f'F1 Score: {f1_xgb:.4f}')

# positive class probability
y_pred_proba_xgb = model_xgb.predict_proba(X_test)[:, 1]

#  false positive rates and true positive rates
fpr_xgb, tpr_xgb, _ = roc_curve(y_test, y_pred_proba_xgb)

#  AUC
auc_xgb = roc_auc_score(y_test, y_pred_proba_xgb)

# AUC-ROC curve
plt.figure(figsize=(10, 6))
plt.plot(fpr_xgb, tpr_xgb, label=f'ROC curve (AUC = {auc_xgb:.2f})')
plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line representing random chance
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve - XGBoost')
plt.legend(loc='lower right')
plt.show()

print(f'Area Under the Curve (AUC): {auc_xgb:.4f}')
Accuracy with XGBoost: 0.8011
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.78      0.80        88
           1       0.79      0.82      0.80        88

    accuracy                           0.80       176
   macro avg       0.80      0.80      0.80       176
weighted avg       0.80      0.80      0.80       176

Confusion Matrix:
[[69 19]
 [16 72]]
No description has been provided for this image
Precision: 0.7912
Recall: 0.8182
F1 Score: 0.8045
No description has been provided for this image
Area Under the Curve (AUC): 0.8594

Light Gradient Boosting¶

In [57]:
# parameter grid
param_grid = {
    'num_leaves': [20, 31, 40, 50],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'n_estimators': [100, 200, 500, 1000],
    'max_depth': [-1, 10, 20, 30],
    'subsample': [0.7, 0.8, 0.9, 1.0],
    'colsample_bytree': [0.6, 0.7, 0.8, 0.9, 1.0],
    'reg_alpha': [0, 0.01, 0.1, 0.5],
    'reg_lambda': [0, 0.01, 0.1, 0.5]
}

# model initilization
lgb_model = lgb.LGBMClassifier(random_state=42)

# RandomSearchCV initilization
random_search = RandomizedSearchCV(
    estimator=lgb_model,
    param_distributions=param_grid,
    n_iter=100,  # Number of parameter settings that are sampled
    scoring='accuracy',
    cv=5,  # 5-fold cross-validation
    verbose=1,
    random_state=42,
    n_jobs=-1  # Use all available cores
)


random_search.fit(X_train, y_train)

# best parameter
best_params = random_search.best_params_
best_model = random_search.best_estimator_

print(f'Best Hyperparameters: {best_params}')
Fitting 5 folds for each of 100 candidates, totalling 500 fits
[LightGBM] [Info] Number of positive: 351, number of negative: 351
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000716 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1024
[LightGBM] [Info] Number of data points in the train set: 702, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
Best Hyperparameters: {'subsample': 1.0, 'reg_lambda': 0, 'reg_alpha': 0, 'num_leaves': 40, 'n_estimators': 200, 'max_depth': 20, 'learning_rate': 0.1, 'colsample_bytree': 0.6}
In [58]:
start_time = time.time()
y_pred = best_model.predict(X_test)

end_time = time.time()  
prediction_time_lgbm = end_time - start_time
print(f"LGBM prediction Time: {prediction_time_lgbm:.4f} seconds")

# accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:')
print(classification_report(y_test, y_pred))


# Confusion Matrix
conf_matrix_lgb = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix_lgb)

disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix_lgb)
disp.plot(cmap='Blues')
plt.title('Confusion Matrix - LightGBM')
plt.show()

# precision, recall, and F1 score
precision_lgb = precision_score(y_test, y_pred, average='binary')
recall_lgb = recall_score(y_test, y_pred, average='binary')
f1_lgb = f1_score(y_test, y_pred, average='binary')

print(f'Precision: {precision_lgb:.4f}')
print(f'Recall: {recall_lgb:.4f}')
print(f'F1 Score: {f1_lgb:.4f}')

# positive class
y_pred_proba_lgb = best_model.predict_proba(X_test)[:, 1]

fpr_lgb, tpr_lgb, _ = roc_curve(y_test, y_pred_proba_lgb)

# AUC
auc_lgb = roc_auc_score(y_test, y_pred_proba_lgb)

# AUC-ROC curve
plt.figure(figsize=(10, 6))
plt.plot(fpr_lgb, tpr_lgb, label=f'ROC curve (AUC = {auc_lgb:.2f})')
plt.plot([0, 1], [0, 1], 'k--')  
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve - LightGBM')
plt.legend(loc='lower right')
plt.show()

print(f'Area Under the Curve (AUC): {auc_lgb:.4f}')
LGBM prediction Time: 0.0000 seconds
Accuracy: 0.8125
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.82      0.81        88
           1       0.82      0.81      0.81        88

    accuracy                           0.81       176
   macro avg       0.81      0.81      0.81       176
weighted avg       0.81      0.81      0.81       176

Confusion Matrix:
[[72 16]
 [17 71]]
No description has been provided for this image
Precision: 0.8161
Recall: 0.8068
F1 Score: 0.8114
No description has been provided for this image
Area Under the Curve (AUC): 0.8505
In [59]:
#  permutation feature importance
perm_importance = permutation_importance(best_model, X_test, y_test, n_repeats=30, random_state=42, scoring='accuracy')


feature_names = X_test.columns
importances = perm_importance.importances_mean
std = perm_importance.importances_std

sorted_idx = np.argsort(importances)
plt.figure(figsize=(10, 6))
plt.barh(range(len(sorted_idx)), importances[sorted_idx], xerr=std[sorted_idx])
plt.yticks(range(len(sorted_idx)), [feature_names[i] for i in sorted_idx])
plt.xlabel('Permutation Importance')
plt.title('Permutation Feature Importance for LightGBM')
plt.show()
No description has been provided for this image
In [60]:
# Initializing the LIME explainer
explainer = lime.lime_tabular.LimeTabularExplainer(
    X_train.values, 
    mode='classification',
    feature_names=X_train.columns,
    class_names=['Non-Diabetic', 'Diabetic'], 
    discretize_continuous=True
)

instance_idx = 0  
exp = explainer.explain_instance(X_test.values[instance_idx], best_model.predict_proba, num_features=10)

# LIME explanation
exp.show_in_notebook(show_table=True, show_all=False)
In [61]:
# LIME explanation plot
exp.as_pyplot_figure()
plt.show()
No description has been provided for this image
In [62]:
plt.figure(figsize=(12, 8))

# SVM
plt.plot(fpr, tpr, label=f'SVM (AUC = {auc:.2f})', color='blue')

# Random Forest
plt.plot(fpr_rf, tpr_rf, label=f'Random Forest (AUC = {auc_rf:.2f})', color='green')

# KNN
plt.plot(fpr_knn, tpr_knn, label=f'KNN (AUC = {auc_knn:.2f})', color='orange')

# XGBoost
plt.plot(fpr_xgb, tpr_xgb, label=f'XGBoost (AUC = {auc_xgb:.2f})', color='red')

# LightGBM
plt.plot(fpr_lgb, tpr_lgb, label=f'LightGBM (AUC = {auc_lgb:.2f})', color='purple')


plt.plot([0, 1], [0, 1], 'k--')


plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curves for Different Models')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()
No description has been provided for this image

Voting Classifier¶

In [64]:
# best models with their best hyperparameters
best_knn = KNeighborsClassifier(n_neighbors=7, weights='distance', metric='manhattan')
best_svm = SVC(C=10, gamma='scale', kernel='rbf', probability=True, random_state=42)
best_rf = RandomForestClassifier(n_estimators=200, max_depth=None, min_samples_split=2, min_samples_leaf=1, bootstrap=False, random_state=42)
best_lgb = lgb.LGBMClassifier(
    subsample=1.0,
    reg_lambda=0,
    reg_alpha=0,
    num_leaves=40,
    n_estimators=200,
    max_depth=20,
    learning_rate=0.1,
    colsample_bytree=0.6,
    random_state=42
)

# Voting classifier
voting_clf = VotingClassifier(
    estimators=[
        ('knn', best_knn),
        ('svm', best_svm),
        ('lgb', best_lgb)
    ],
    voting='soft')

# model training
voting_clf.fit(X_train, y_train)

start_time = time.time()

y_pred = voting_clf.predict(X_test)

end_time = time.time()  
prediction_time_voting = end_time - start_time
print(f"Voting Classifier prediction Time: {prediction_time_voting:.4f} seconds")
[LightGBM] [Info] Number of positive: 351, number of negative: 351
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001528 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1024
[LightGBM] [Info] Number of data points in the train set: 702, number of used features: 6
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
Voting Classifier prediction Time: 0.0303 seconds
In [65]:
# Evaluating
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of Voting Classifier: {accuracy:.4f}')
print('Classification Report:')
print(classification_report(y_test, y_pred))

# Confusion Matrix
conf_matrix_voting = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix_voting)
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix_voting)
disp.plot(cmap='Blues')
plt.title('Confusion Matrix - Voting Classifier')
plt.show()

# precision, recall, and F1 score
precision_voting = precision_score(y_test, y_pred, average='binary')
recall_voting = recall_score(y_test, y_pred, average='binary')
f1_voting = f1_score(y_test, y_pred, average='binary')
print(f'Precision: {precision_voting:.4f}')
print(f'Recall: {recall_voting:.4f}')
print(f'F1 Score: {f1_voting:.4f}')


y_pred_proba_voting = voting_clf.predict_proba(X_test)[:, 1]
fpr_voting, tpr_voting, _ = roc_curve(y_test, y_pred_proba_voting)
auc_voting = roc_auc_score(y_test, y_pred_proba_voting)

#AUC-ROC curve
plt.figure(figsize=(10, 6))
plt.plot(fpr_voting, tpr_voting, label=f'ROC curve (AUC = {auc_voting:.2f})')
plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line representing random chance
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve - Voting Classifier')
plt.legend(loc='lower right')
plt.show()

print(f'Area Under the Curve (AUC): {auc_voting:.4f}')
Accuracy of Voting Classifier: 0.8523
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.84      0.85        88
           1       0.84      0.86      0.85        88

    accuracy                           0.85       176
   macro avg       0.85      0.85      0.85       176
weighted avg       0.85      0.85      0.85       176

Confusion Matrix:
[[74 14]
 [12 76]]
No description has been provided for this image
Precision: 0.8444
Recall: 0.8636
F1 Score: 0.8539
No description has been provided for this image
Area Under the Curve (AUC): 0.8733
In [66]:
# permutation feature importance
perm_importance = permutation_importance(voting_clf, X_test, y_test, n_repeats=30, random_state=42, scoring='accuracy')

feature_names = X_test.columns
importances = perm_importance.importances_mean
std = perm_importance.importances_std

# Sorting the features by importance
sorted_idx = np.argsort(importances)
plt.figure(figsize=(10, 6))
plt.barh(range(len(sorted_idx)), importances[sorted_idx], xerr=std[sorted_idx])
plt.yticks(range(len(sorted_idx)), [feature_names[i] for i in sorted_idx])
plt.xlabel('Permutation Importance')
plt.title('Permutation Feature Importance for Voting Classifier')
plt.show()
No description has been provided for this image
In [67]:
explainer = lime.lime_tabular.LimeTabularExplainer(
    X_train.values, 
    mode='classification',
    feature_names=X_train.columns,
    class_names=['Non-Diabetic', 'Diabetic'], 
    discretize_continuous=True
)

instance_idx = 0  # You can choose any instance
exp = explainer.explain_instance(X_test.values[instance_idx], voting_clf.predict_proba, num_features=10)

exp.show_in_notebook(show_table=True, show_all=False)
In [68]:
#  LIME explanation plot
exp.as_pyplot_figure()
plt.show()
No description has been provided for this image

PDP¶

In [69]:
features = X_train.columns

# Calculating the number of plots needed
n_features = len(features)
n_cols = 2  # Number of columns for subplots
n_rows = (n_features + n_cols - 1) // n_cols  # Calculate rows needed based on the number of features

# Creating PDP plots for all features
fig, ax = plt.subplots(n_rows, n_cols, figsize=(14, n_rows * 5))

if n_rows > 1:
    ax = ax.flatten()
else:
    ax = [ax]

colors = sns.color_palette("husl", len(features))

# Plotting each feature separately using PartialDependenceDisplay
for i, feature in enumerate(features):
    PartialDependenceDisplay.from_estimator(
        voting_clf, 
        X_train, 
        [feature], 
        ax=ax[i], 
        grid_resolution=50, 
        line_kw={"color": colors[i]}
    )
    
 
    ax[i].set_title(f'Partial Dependence of {feature}', fontsize=14)
    ax[i].grid(True, linestyle='--', color='gray')
    ax[i].spines['top'].set_visible(False)
    ax[i].spines['right'].set_visible(False)


for j in range(i + 1, len(ax)):
    ax[j].set_visible(False)

plt.tight_layout()
plt.show()
No description has been provided for this image

Counterfactual Explanation¶

In [70]:
feature_names = X_train.columns.tolist()
class_names = ['Non-Diabetic', 'Diabetic']  # Adjust based on your target classes
In [71]:
# training data to a DataFrame for DiCE
data = X_train.copy()
data['Outcome'] = y_train

dice_data = dice_ml.Data(dataframe=data, continuous_features=feature_names, outcome_name='Outcome')

model_dice = dice_ml.Model(model=voting_clf, backend="sklearn")

# DiCE explainer object
dice_exp = Dice(dice_data, model_dice, method="random")
In [72]:
test_instance = X_test.iloc[0:1]

# Generating counterfactuals
counterfactuals = dice_exp.generate_counterfactuals(test_instance, total_CFs=5, desired_class="opposite")
counterfactuals.visualize_as_dataframe(show_only_changes=True)
100%|██████████| 1/1 [00:00<00:00,  3.51it/s]
Query instance (original outcome : 0)

Pregnancies Glucose BMI DiabetesPedigreeFunction Age glucose_bmi_interaction Outcome
0 -1.167546 0.236358 -0.716416 0.342251 -1.058093 -0.293964 0
Diverse Counterfactual set (new outcome: 1)
Pregnancies Glucose BMI DiabetesPedigreeFunction Age glucose_bmi_interaction Outcome
0 - - - - 1.44487045 - 1.0
1 - - 0.35143044 - - - 1.0
2 - - - - 2.45898982 - 1.0
3 - - - - 0.96178267 - 1.0
4 - - - - 1.66822879 - 1.0
In [73]:
# counterfactuals to a DataFrame
cf_df = counterfactuals.cf_examples_list[0].final_cfs_df
print(cf_df)
   Pregnancies   Glucose       BMI  DiabetesPedigreeFunction       Age  \
0    -1.167546  0.236358 -0.716416                  0.342251  1.444870   
1    -1.167546  0.236358  0.351430                  0.342251 -1.058093   
2    -1.167546  0.236358 -0.716416                  0.342251  2.458990   
3    -1.167546  0.236358 -0.716416                  0.342251  0.961783   
4    -1.167546  0.236358 -0.716416                  0.342251  1.668229   

   glucose_bmi_interaction  Outcome  
0                -0.293964        1  
1                -0.293964        1  
2                -0.293964        1  
3                -0.293964        1  
4                -0.293964        1  

Stacking Classifier¶

In [74]:
# base models
base_models = [
    ('knn', KNeighborsClassifier(n_neighbors=7, weights='distance', metric='manhattan')),
    ('svm', SVC(C=10, gamma='scale', kernel='rbf', probability=True, random_state=42)),
    ('lgb', lgb.LGBMClassifier(subsample=1.0,
    reg_lambda=0,
    reg_alpha=0,
    num_leaves=40,
    n_estimators=200,
    max_depth=20,
    learning_rate=0.1,
    colsample_bytree=0.6,
    random_state=42))
]

# meta-model
meta_model = LogisticRegression(random_state=42)

# Stacking Classifier
stacking_clf = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5, n_jobs=-1)
In [75]:
# Training the model
stacking_clf.fit(X_train, y_train)

start_time = time.time()

y_pred = stacking_clf.predict(X_test)

end_time = time.time()  
prediction_time_stacking = end_time - start_time
print(f"Stacking Classifer prediction Time: {prediction_time_stacking:.4f} seconds")

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of Stacking Classifier: {accuracy:.4f}')
print('Classification Report:')
print(classification_report(y_test, y_pred))


# Confusion Matrix
conf_matrix_stack = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(conf_matrix_stack)

disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix_stack)
disp.plot(cmap='Blues')
plt.title('Confusion Matrix - Stacking Classifier')
plt.show()

# precision, recall, and F1 score
precision_stack = precision_score(y_test, y_pred, average='binary')
recall_stack = recall_score(y_test, y_pred, average='binary')
f1_stack = f1_score(y_test, y_pred, average='binary')

print(f'Precision: {precision_stack:.4f}')
print(f'Recall: {recall_stack:.4f}')
print(f'F1 Score: {f1_stack:.4f}')

#  positive class
y_pred_proba_stack = stacking_clf.predict_proba(X_test)[:, 1]

#  false positive rates and true positive rates
fpr_stack, tpr_stack, _ = roc_curve(y_test, y_pred_proba_stack)

# AUC
auc_stack = roc_auc_score(y_test, y_pred_proba_stack)

# AUC-ROC curve
plt.figure(figsize=(10, 6))
plt.plot(fpr_stack, tpr_stack, label=f'ROC curve (AUC = {auc_stack:.2f})')
plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line representing random chance
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve - Stacking Classifier')
plt.legend(loc='lower right')
plt.show()

print(f'Area Under the Curve (AUC): {auc_stack:.4f}')
Stacking Classifer prediction Time: 0.0121 seconds
Accuracy of Stacking Classifier: 0.8409
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.84      0.84        88
           1       0.84      0.84      0.84        88

    accuracy                           0.84       176
   macro avg       0.84      0.84      0.84       176
weighted avg       0.84      0.84      0.84       176

Confusion Matrix:
[[74 14]
 [14 74]]
No description has been provided for this image
Precision: 0.8409
Recall: 0.8409
F1 Score: 0.8409
No description has been provided for this image
Area Under the Curve (AUC): 0.8744
In [76]:
# ROC curves of ensemble models
plt.figure(figsize=(10, 6))
plt.plot(fpr_voting, tpr_voting, label=f'Voting Classifier ROC curve (AUC = {auc_voting:.2f})', color='blue')
plt.plot(fpr_stack, tpr_stack, label=f'Stacking Classifier ROC curve (AUC = {auc_stack:.2f})', color='green')
plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line representing random chance
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve - Voting vs Stacking Classifier')
plt.legend(loc='lower right')
plt.show()


print(f'Area Under the Curve (AUC) for Voting Classifier: {auc_voting:.4f}')
print(f'Area Under the Curve (AUC) for Stacking Classifier: {auc_stack:.4f}')
No description has been provided for this image
Area Under the Curve (AUC) for Voting Classifier: 0.8733
Area Under the Curve (AUC) for Stacking Classifier: 0.8744
In [77]:
#  permutation feature importance
perm_importance = permutation_importance(stacking_clf, X_test, y_test, n_repeats=30, random_state=42, scoring='accuracy')

feature_names = X_test.columns
importances = perm_importance.importances_mean
std = perm_importance.importances_std

sorted_idx = np.argsort(importances)
plt.figure(figsize=(10, 6))
plt.barh(range(len(sorted_idx)), importances[sorted_idx], xerr=std[sorted_idx])
plt.yticks(range(len(sorted_idx)), [feature_names[i] for i in sorted_idx])
plt.xlabel('Permutation Importance')
plt.title('Permutation Feature Importance for Stacking Classifier')
plt.show()
No description has been provided for this image
In [78]:
# Initializing the LIME explainer
explainer = lime.lime_tabular.LimeTabularExplainer(
    X_train.values, 
    mode='classification',
    feature_names=X_train.columns,
    class_names=['Non-Diabetic', 'Diabetic'], 
    discretize_continuous=True
)

instance_idx = 0  
exp = explainer.explain_instance(X_test.values[instance_idx], stacking_clf.predict_proba, num_features=10)

exp.show_in_notebook(show_table=True, show_all=False)
In [79]:
exp.as_pyplot_figure()
plt.show()
No description has been provided for this image

PDP¶

In [80]:
features = X_train.columns

n_features = len(features)
n_cols = 2  # Number of columns for subplots
n_rows = (n_features + n_cols - 1) // n_cols  # Calculate rows needed based on the number of features

# Creating PDP plots for all features
fig, ax = plt.subplots(n_rows, n_cols, figsize=(14, n_rows * 5))

if n_rows > 1:
    ax = ax.flatten()
else:
    ax = [ax]

colors = sns.color_palette("husl", len(features))

# Plotting each feature separately using PartialDependenceDisplay
for i, feature in enumerate(features):
    PartialDependenceDisplay.from_estimator(
        stacking_clf, 
        X_train, 
        [feature], 
        ax=ax[i], 
        grid_resolution=50, 
        line_kw={"color": colors[i]}
    )
    

    ax[i].set_title(f'Partial Dependence of {feature}', fontsize=14)
    ax[i].grid(True, linestyle='--', color='gray')
    ax[i].spines['top'].set_visible(False)
    ax[i].spines['right'].set_visible(False)

for j in range(i + 1, len(ax)):
    ax[j].set_visible(False)

plt.tight_layout()
plt.show()
No description has been provided for this image

Counterfactual Explanation¶

In [81]:
feature_names = X_train.columns.tolist()
class_names = ['Non-Diabetic', 'Diabetic']  # Adjust based on your target classes
In [82]:
# data to a DataFrame for DiCE
data = X_train.copy()
data['Outcome'] = y_train

dice_data = dice_ml.Data(dataframe=data, continuous_features=feature_names, outcome_name='Outcome') #Dice data object
model_dice = dice_ml.Model(model=stacking_clf, backend="sklearn") #model object
dice_exp = Dice(dice_data, model_dice, method="random") #DiCE explainer object

test_instance = X_test.iloc[0:1]

# Generating counterfactuals
counterfactuals = dice_exp.generate_counterfactuals(test_instance, total_CFs=5, desired_class="opposite")

counterfactuals.visualize_as_dataframe(show_only_changes=True)
# counterfactuals to a DataFrame
cf_df = counterfactuals.cf_examples_list[0].final_cfs_df
print(cf_df)
100%|██████████| 1/1 [00:00<00:00,  4.12it/s]
Query instance (original outcome : 0)

Pregnancies Glucose BMI DiabetesPedigreeFunction Age glucose_bmi_interaction Outcome
0 -1.167546 0.236358 -0.716416 0.342251 -1.058093 -0.293964 0
Diverse Counterfactual set (new outcome: 1)
Pregnancies Glucose BMI DiabetesPedigreeFunction Age glucose_bmi_interaction Outcome
0 - - - - 1.8985748 - 1.0
1 - - - - 0.70265673 - 1.0
2 - - - - 1.04099345 - 1.0
3 - - 0.87125753 - 0.72253209 - 1.0
4 - - - - 0.86099638 - 1.0
   Pregnancies   Glucose       BMI  DiabetesPedigreeFunction       Age  \
0    -1.167546  0.236358 -0.716416                  0.342251  1.898575   
1    -1.167546  0.236358 -0.716416                  0.342251  0.702657   
2    -1.167546  0.236358 -0.716416                  0.342251  1.040993   
3    -1.167546  0.236358  0.871258                  0.342251  0.722532   
4    -1.167546  0.236358 -0.716416                  0.342251  0.860996   

   glucose_bmi_interaction  Outcome  
0                -0.293964        1  
1                -0.293964        1  
2                -0.293964        1  
3                -0.293964        1  
4                -0.293964        1  
In [ ]: